rm(list = ls())
options(scipen = 999)

# Load the necessary libraries
library(Synth)
library(dplyr)
library(gsynth)
library(writexl)
library(ggplot2)

# Load panel data
data <- read.csv("data_WB_constant_2015.csv",sep=";")
data$country_name <- as.character(data$country_name)
data$country_code <- as.character(data$country_code)

country_left = c("Honduras")

treated_country = 'Chile'
intervention_year = 2014

# Identify the pre-treatment and post-treatment periods
pre_treatment_period <- c(1990:(intervention_year-1))   # Replace with the years of your pre-treatment period
post_treatment_period <- c(intervention_year: 2019)  # Replace with the years of your post-treatment period

# Create the outcome and covariates matrices

outcome <- data[data$country_name == treated_country, "gdp"]

covariate_names <- c(names(data)[4:12]) #all [4:12]

# Chequeando variables excluidas
names(data)[!names(data) %in% covariate_names]

covariates <- data %>% subset(country_name != treated_country) %>% select(all_of(covariate_names))
covariate_names
data <- data %>%
  mutate_at(c(all_of(covariate_names)), ~as.numeric(gsub(",", ".", .)))
covariates <- data %>% subset(country_name != treated_country) %>% select(all_of(covariate_names))
data$country_index = as.numeric(factor(data$country_name))


library(car)
model <- lm(gdp ~ ., data=data %>% select(c(all_of(c('gdp', covariate_names)))))
vif_values <- vif(model)
print(vif_values)

treated_country_index = unique(data$country_index[which(data$country_name == treated_country)])
left_country_index = unique(data$country_index[which(data$country_name == country_left)]) 
if(length(treated_country_index) > 1){treated_country_index = treated_country_index[1]}


# CV Explicito ------------------------------------------------------------
library(MSCMT)
data <- data %>% subset(!country_name %in% c(country_left) )
data = data %>% mutate(country_index = as.numeric(as.factor(country_name)))

data_long <- listFromLong(data, unit.variable="country_index", time.variable="year", unit.names.variable="country_name")
head(data_long)

treatment.identifier <- "Chile"
controls.identifier  <- setdiff(colnames(data_long[[1]]),
                                c(treatment.identifier))

# Períodos de entrenamiento y validación para la variable dependiente
times.dep <- cbind("gdp" = c(1990, 2013)) #treatment (2014, therefore treat-1=2013, same as line 19/22)

# Períodos de entrenamiento y validación para los predictores
times.pred <- cbind("pop"                            = c(1990, 2013),
                    "school"                         = c(1990, 2013),
                    "adol_fert"                      = c(1990, 2013),
                    "birth_rate"                     = c(1990, 2013),
                    "gov_cons"                       = c(1990, 2013),
                    "invest"                         = c(1990, 2013),
                    "open"                           = c(1990, 2013),
                    "school"                         = c(1990, 2013))

agg.fns <- rep("mean", ncol(times.pred))


############################################################################################################
# Definiendo periodos CV
year_fin_training_cv_1 = 1998

periodo_training_1 = c(1990,year_fin_training_cv_1)
periodo_validation_1 = c(periodo_training_1[2] + 1, 2013)

############################################################################################################
### Definiendo CV 1
# Periodo de entrenamiento para las variables predictoras
times.pred.training_1 <- cbind("pop"                            = periodo_training_1,
                               "school"                         = periodo_training_1,
                               "adol_fert"                      = periodo_training_1,
                               "birth_rate"                     = periodo_training_1,
                               "gov_cons"                       = periodo_training_1,
                               "invest"                         = periodo_training_1,
                               "open"                           = periodo_training_1,
                               "school"                         = periodo_training_1)

# Periodo de validación para la variable dependiente
times.dep.validation_1 <- cbind("gdp" = periodo_validation_1)
############################################################################################################

############################################################################################################
###### Entrenando Primera CV
# Uso de la función mscmt con validación cruzada
res_cv_1 <- mscmt(
  data = data_long,
  treatment.identifier = treatment.identifier,
  controls.identifier = controls.identifier,
  times.dep = times.dep,
  times.pred = times.pred,
  agg.fns = agg.fns,
  seed = 42,
  verbose = TRUE,
  debug = T,
  times.pred.training = times.pred.training_1,
  times.dep.validation = times.dep.validation_1
)


ggplot(res_cv_1[[1]], type="comparison") # CV 1

#Results are then exported to an xlsx. file and plotted. 

